{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import sys\n",
    "\n",
    "# optimus\n",
    "sys.path.append(\"..\")\n",
    "\n",
    "# pypika\n",
    "sys.path.append(\"../../pypika\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pypika import Query, Table, Field, analytics as an\n",
    "from pypika.dialects import MySQLQuery as Query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SELECT MIN('RATING'),MAX('RATING') FROM `main.music`\n"
     ]
    }
   ],
   "source": [
    "agg = [an.Min('RATING'),an.Max('RATING')]\n",
    "q = Query.from_('main.music').select(*agg)\n",
    "print(q.get_sql())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'SELECT MIN(RATING) FROM main.music'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"SELECT MIN(RATING) FROM main.music\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SELECT `fruits`.`id`,`fruits`.`name` FROM `fruits` JOIN `consumers` ON `fruits`.`consumer_id`=`consumers`.`id` WHERE `fruits`.`harvest_date`+INTERVAL 1 DAY<NOW()\n"
     ]
    }
   ],
   "source": [
    "from pypika import functions as fn\n",
    "from pypika import Tables, MySQLQuery, Interval\n",
    "\n",
    "fruits, consumers = Tables('fruits', 'consumers')\n",
    "q = MySQLQuery.from_(fruits) \\\n",
    "    .join(consumers) \\\n",
    "    .on(fruits.consumer_id == consumers.id) \\\n",
    "    .select(fruits.id, fruits.name) \\\n",
    "    .where((fruits.harvest_date + Interval(days=1)) < fn.Now())\n",
    "print(q.get_sql())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\socks.py:58: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n",
      "  from collections import Callable\n",
      "\n",
      "    You are using PySparkling of version 2.4.10, but your PySpark is of\n",
      "    version 2.3.1. Please make sure Spark and PySparkling versions are compatible. \n",
      "INFO:optimus:Operative System:Windows\n",
      "INFO:optimus:Just check that Spark and all necessary environments vars are present...\n",
      "INFO:optimus:-----\n",
      "INFO:optimus:SPARK_HOME=C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\n",
      "INFO:optimus:HADOOP_HOME=C:\\opt\\hadoop-2.7.7\n",
      "INFO:optimus:PYSPARK_PYTHON=C:\\Users\\argenisleon\\Anaconda3\\python.exe\n",
      "INFO:optimus:PYSPARK_DRIVER_PYTHON=jupyter\n",
      "INFO:optimus:PYSPARK_SUBMIT_ARGS=--jars \"file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/mysql-connector-java-8.0.16.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/ojdbc8.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/postgresql-42.2.5.jar\" --driver-class-path \"C:/Users/argenisleon/Documents/Optimus/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/mysql-connector-java-8.0.16.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/ojdbc8.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/postgresql-42.2.5.jar\" --conf \"spark.sql.catalogImplementation=hive\" pyspark-shell\n",
      "INFO:optimus:JAVA_HOME=C:\\java\n",
      "INFO:optimus:Pyarrow Installed\n",
      "INFO:optimus:-----\n",
      "INFO:optimus:Starting or getting SparkSession and SparkContext...\n",
      "INFO:optimus:Spark Version:2.3.1\n",
      "INFO:optimus:\n",
      "                             ____        __  _                     \n",
      "                            / __ \\____  / /_(_)___ ___  __  _______\n",
      "                           / / / / __ \\/ __/ / __ `__ \\/ / / / ___/\n",
      "                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) \n",
      "                          \\____/ .___/\\__/_/_/ /_/ /_/\\__,_/____/  \n",
      "                              /_/                                  \n",
      "                              \n",
      "INFO:optimus:Transform and Roll out...\n",
      "INFO:optimus:Optimus successfully imported. Have fun :).\n",
      "INFO:optimus:Config.ini not found\n"
     ]
    }
   ],
   "source": [
    "from optimus import Optimus\n",
    "op= Optimus(master=\"local\", app_name= \"optimus\", verbose = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = op.load.csv(\"data/Meteorite_Landings.csv\").h_repartition()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+-----+--------+--------+--------+-----+--------------------+----------+----------+--------------------+\n",
      "|                name|   id|nametype|recclass|mass (g)| fall|                year|    reclat|   reclong|         GeoLocation|\n",
      "+--------------------+-----+--------+--------+--------+-----+--------------------+----------+----------+--------------------+\n",
      "|Dominion Range 08357|52132|   Valid|      L6|     8.9|Found|01/01/2008 12:00:...|       0.0|       0.0|(0.000000, 0.000000)|\n",
      "|       Yamato 792863|28212|   Valid|      H5|  132.25|Found|01/01/1979 12:00:...|     -71.5|  35.66667|(-71.500000, 35.6...|\n",
      "|           Acfer 232|  240|   Valid|      H5|   725.0|Found|01/01/1991 12:00:...|  27.73944|   4.32833|(27.739440, 4.328...|\n",
      "|Jiddat al Harasis...|56470|   Valid|      L5|    17.5|Found|                null|  19.83528|  56.46139|(19.835280, 56.46...|\n",
      "|               Imlay|52855|   Valid|      L5|   770.0|Found|01/01/2009 12:00:...|  40.74018|-118.17285|(40.740180, -118....|\n",
      "|           Shişr 029|23564|   Valid|      H5|  204.71|Found|01/01/1998 12:00:...|   18.1501|  53.80047|(18.150100, 53.80...|\n",
      "|Northwest Africa ...|31272|   Valid| Eucrite|   138.0|Found|01/01/2004 12:00:...|      null|      null|                null|\n",
      "|         Dhofar 1462|55271|   Valid|     L~4|   400.8|Found|01/01/2008 12:00:...|  18.34955|  54.25013|(18.349550, 54.25...|\n",
      "|          Dhofar 085| 6784|   Valid|      H4|    78.0|Found|01/01/1999 12:00:...|  19.21233|   54.8475|(19.212330, 54.84...|\n",
      "|Grove Mountains 0...|50563|   Valid|      L6|     1.8|Found|01/01/2006 12:00:...| -72.78194|  75.30056|(-72.781940, 75.3...|\n",
      "|Grove Mountains 0...|46973|   Valid|      H4|    1.55|Found|01/01/2006 12:00:...| -72.77833|  75.32167|(-72.778330, 75.3...|\n",
      "|Queen Alexandra R...|20352|   Valid|      H6|     5.9|Found|01/01/1994 12:00:...|     -84.0|     168.0|(-84.000000, 168....|\n",
      "|Grove Mountains 0...|48024|   Valid|      H5|    1.01|Found|01/01/2003 12:00:...|-72.773333| 75.326944|(-72.773333, 75.3...|\n",
      "|LaPaz Icefield 03...|34826|   Valid|      L5|    28.2|Found|01/01/2003 12:00:...|      null|      null|                null|\n",
      "|Northwest Africa ...|31200|   Valid|    L5/6|  1200.0|Found|01/01/2002 12:00:...|      null|      null|                null|\n",
      "|Elephant Moraine ...| 9762|   Valid|      H5|    11.2|Found|01/01/1996 12:00:...| -76.18333| 157.16667|(-76.183330, 157....|\n",
      "| Miller Range 090027|53340|   Valid|     LL5|   981.2|Found|01/01/2009 12:00:...|       0.0|       0.0|(0.000000, 0.000000)|\n",
      "| Miller Range 090895|54418|   Valid|      H5|     5.3|Found|01/01/2009 12:00:...|       0.0|       0.0|(0.000000, 0.000000)|\n",
      "|        San Juan 043|52381|   Valid|      H5|    26.4|Found|01/01/2009 12:00:...|-25.443167|  -69.8825|(-25.443167, -69....|\n",
      "|  Miller Range 05057|44456|   Valid|     LL6|   472.2|Found|01/01/2005 12:00:...|      null|      null|                null|\n",
      "+--------------------+-----+--------+--------+--------+-----+--------------------+----------+----------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+-------+\n",
      "|  a|max(id)|\n",
      "+---+-------+\n",
      "|  1|  57458|\n",
      "+---+-------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pypika import Tables, MySQLQuery, Interval\n",
    "\n",
    "df.set_name(\"df\")\n",
    "\n",
    "agg = [an.Min('id').as_('a'),an.Max('id')]\n",
    "\n",
    "q = MySQLQuery.from_(\"df\").select(*agg) \n",
    "\n",
    "\n",
    "def clean(c):\n",
    "    return q.get_sql().replace(\"'\",\"`\")\n",
    "\n",
    "df.query(clean(q)).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MIN('id')\n",
      "SELECT MIN(`id`) `Min_id` FROM `df`\n",
      "+------+\n",
      "|Min_id|\n",
      "+------+\n",
      "|     1|\n",
      "+------+\n",
      "\n"
     ]
    },
    {
     "ename": "AssertionError",
     "evalue": "all exprs should be Column",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-45-7de2afff22f7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmin_sql\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m     47\u001b[0m         \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     48\u001b[0m             \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m             \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     50\u001b[0m             \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     51\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mmin_sql\u001b[1;34m(columns)\u001b[0m\n\u001b[0;32m    578\u001b[0m         \u001b[1;33m:\u001b[0m\u001b[1;32mreturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    579\u001b[0m         \"\"\"\n\u001b[1;32m--> 580\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0magg_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0man\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMin\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    581\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    582\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36magg_exprs\u001b[1;34m(columns, funcs, tidy, *args)\u001b[0m\n\u001b[0;32m    533\u001b[0m         \u001b[1;33m:\u001b[0m\u001b[1;32mreturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    534\u001b[0m         \"\"\"\n\u001b[1;32m--> 535\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mexec_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcreate_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtidy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    536\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    537\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0madd_attr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mexec_agg\u001b[1;34m(exprs, tidy)\u001b[0m\n\u001b[0;32m    553\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexprs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    554\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 555\u001b[1;33m         \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0magg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mexprs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    556\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    557\u001b[0m         \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_col_names_funcs_to_keys\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_json\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\dataframe.py\u001b[0m in \u001b[0;36magg\u001b[1;34m(self, *exprs)\u001b[0m\n\u001b[0;32m   1325\u001b[0m         \u001b[1;33m[\u001b[0m\u001b[0mRow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1326\u001b[0m         \"\"\"\n\u001b[1;32m-> 1327\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgroupBy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0magg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mexprs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1328\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1329\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0msince\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2.0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\group.py\u001b[0m in \u001b[0;36magg\u001b[1;34m(self, *exprs)\u001b[0m\n\u001b[0;32m     89\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     90\u001b[0m             \u001b[1;31m# Columns\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 91\u001b[1;33m             \u001b[1;32massert\u001b[0m \u001b[0mall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mColumn\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mexprs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"all exprs should be Column\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     92\u001b[0m             jdf = self._jgd.agg(exprs[0]._jc,\n\u001b[0;32m     93\u001b[0m                                 _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]]))\n",
      "\u001b[1;31mAssertionError\u001b[0m: all exprs should be Column"
     ]
    }
   ],
   "source": [
    "df.cols.min_sql(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "57458"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.cols.max(\"id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'name': 'string',\n",
       " 'id': 'int',\n",
       " 'nametype': 'string',\n",
       " 'recclass': 'string',\n",
       " 'mass (g)': 'double',\n",
       " 'fall': 'string',\n",
       " 'year': 'string',\n",
       " 'reclat': 'double',\n",
       " 'reclong': 'double',\n",
       " 'GeoLocation': 'string'}"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.cols.dtypes()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 0, 'mass (g)': 19}"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.cols.count_zeros([\"id\",\"mass (g)\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 42365, 'mass (g)': 12497}"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.cols.count_uniques([\"id\",\"mass (g)\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------+-----------+\n",
      "|count(1)|   recclass|\n",
      "+--------+-----------+\n",
      "|    8285|         L6|\n",
      "|    7142|         H5|\n",
      "|    4796|         L5|\n",
      "|    4528|         H6|\n",
      "|    4211|         H4|\n",
      "|    2766|        LL5|\n",
      "|    2043|        LL6|\n",
      "|    1253|         L4|\n",
      "|     428|       H4/5|\n",
      "|     416|        CM2|\n",
      "|     386|         H3|\n",
      "|     365|         L3|\n",
      "|     335|        CO3|\n",
      "|     300|   Ureilite|\n",
      "|     285|Iron, IIIAB|\n",
      "|     268|        LL4|\n",
      "|     256|        CV3|\n",
      "|     241|  Diogenite|\n",
      "|     240|  Howardite|\n",
      "|     225|         LL|\n",
      "+--------+-----------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.query(\"SELECT COUNT(*), recclass from df GROUP BY recclass ORDER BY COUNT(*) DESC\").show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
